In [2]:
import copy, pathlib, math
import PIL.Image as Image
import sklearn
import numpy
import matplotlib
import matplotlib.pyplot as plt
print("scikit-learn version:", sklearn.__version__) # 1.1.3
print("numpy version:", numpy.__version__) # 1.23.4
print("matplotlib version:", matplotlib.__version__) # 3.6.2
scikit-learn version: 1.6.1 numpy version: 1.23.5 matplotlib version: 3.10.0
In [3]:
import numpy as np
size = 256
def load_image(file, size):
img = Image.open(file).convert("RGB") # Convert all images to RGB
img = img.resize((size, size))
return np.array(img).flatten()
def load_labelled_images(path, size):
labels = list()
files = list()
for file_info in pathlib.Path(path).glob("**/*"):
if file_info.suffix.lower() in ['.jpg', '.jpeg', '.png']:
labels.append(file_info.parent.name)
files.append(str(file_info))
imgs = numpy.array([load_image(f, size) for f in files])
return imgs, numpy.array(labels)
images, labels = load_labelled_images("./car_dataset", size)
print("Loaded", len(images), "images in the following", len(numpy.unique(labels)), "classes:")
for label in numpy.unique(labels):
print(label)
Loaded 179 images in the following 3 classes: audi bmw mercedes
In [7]:
sample_size = 24
plotimgs = copy.deepcopy(images)
numpy.random.shuffle(plotimgs)
rows = plotimgs[:sample_size]
_, subplots = plt.subplots(nrows = math.ceil(sample_size/8), ncols = 8, figsize=(18, int(sample_size/3)))
subplots = subplots.flatten()
for i, x in enumerate(rows):
subplots[i].imshow(numpy.reshape(x, [size, size, 3]))
subplots[i].set_xticks([])
subplots[i].set_yticks([])
In [8]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=.3, random_state=0)
In [9]:
from sklearn.svm import SVC
model = SVC()
model.fit(X_train, y_train)
score = model.score(X_test, y_test)
print("Accuracy:", score)
Accuracy: 0.3148148148148148
In [10]:
from sklearn.metrics import classification_report
predictions = model.predict(X_test)
report = classification_report(y_test, predictions)
print(report)
precision recall f1-score support
audi 0.39 0.67 0.49 21
bmw 0.17 0.21 0.19 14
mercedes 0.00 0.00 0.00 19
accuracy 0.31 54
macro avg 0.19 0.29 0.23 54
weighted avg 0.19 0.31 0.24 54
C:\Users\User\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\LocalCache\local-packages\Python311\site-packages\sklearn\metrics\_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
_warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
C:\Users\User\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\LocalCache\local-packages\Python311\site-packages\sklearn\metrics\_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
_warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
C:\Users\User\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\LocalCache\local-packages\Python311\site-packages\sklearn\metrics\_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
_warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
In [11]:
_, subplots = plt.subplots(nrows = math.ceil(len(X_test)/4), ncols = 4, figsize=(15, len(X_test)))
subplots = subplots.flatten()
for i, x in enumerate(X_test):
subplots[i].imshow(numpy.reshape(x, [size, size, 3]))
subplots[i].set_xticks([])
subplots[i].set_yticks([])
subplots[i].set_title(predictions[i] + (" (correct)" if predictions[i] == y_test[i] else " (wrong)"))
In [12]:
from sklearn.svm import SVC
for C_value in [0.5, 1.0, 2.0]:
model = SVC(C=C_value)
model.fit(X_train, y_train)
score = model.score(X_test, y_test)
print(f"Accuracy with C={C_value}: {score:.4f}")
Accuracy with C=0.5: 0.3889 Accuracy with C=1.0: 0.3148 Accuracy with C=2.0: 0.2778
In [13]:
kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for kernel in kernels:
model = SVC(kernel=kernel)
model.fit(X_train, y_train)
score = model.score(X_test, y_test)
print(f"Accuracy with {kernel} kernel: {score:.4f}")
Accuracy with linear kernel: 0.2963 Accuracy with poly kernel: 0.3148 Accuracy with rbf kernel: 0.3148 Accuracy with sigmoid kernel: 0.3704